library(readxl)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.8
## ✓ tidyr   1.2.0     ✓ stringr 1.4.0
## ✓ readr   2.1.2     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(TSstudio)
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
Gold <- read_xlsx("5010DATA.xlsx", sheet = "Gold")

Gold$Date <- as.Date(Gold$Date, format = "%Y-%m-%d")
#class(Gold$Date)
ts_plot(Gold[-nrow(Gold),-2], slider = TRUE, 
        title = "Monthly PricePercentage Change Gold Price From 2003-2022", 
        Xtitle = "Percentage Change", Ytitle = "Price(USD)")
ts_plot(Gold[,-3], slider = TRUE, 
        title = "Monthly Price Change Gold Price From 2003-2022", 
        Xtitle = "Time", Ytitle = "Price(USD)")
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(survMisc)
## Loading required package: survival
## 
## Attaching package: 'survMisc'
## The following object is masked from 'package:forecast':
## 
##     autoplot
## The following object is masked from 'package:ggplot2':
## 
##     autoplot
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
price <- ts(Gold$`Last Price`, frequency = 12, start = c(2003, 1), end = c(2022, 4))
price
##          Jan     Feb     Mar     Apr     May     Jun     Jul     Aug     Sep
## 2003  368.15  349.95  337.45  338.55  364.45  346.40  354.35  375.55  385.35
## 2004  402.45  396.15  426.45  386.75  395.55  394.25  391.05  409.85  418.25
## 2005  422.60  435.65  428.35  434.40  417.25  435.50  429.80  435.20  469.30
## 2006  568.90  561.55  583.65  654.43  645.20  615.85  636.75  627.30  598.30
## 2007  653.20  669.35  663.80  678.45  660.50  649.65  664.30  673.30  743.60
## 2008  926.10  973.90  916.90  877.55  886.50  925.40  914.10  831.15  870.95
## 2009  927.85  942.32  919.35  888.20  979.18  926.50  954.00  951.19 1007.70
## 2010 1081.20 1117.59 1113.25 1179.03 1216.30 1242.38 1181.00 1247.40 1308.54
## 2011 1332.68 1411.33 1432.20 1563.70 1535.73 1500.18 1627.05 1825.55 1623.79
## 2012 1737.76 1696.76 1668.15 1664.75 1560.51 1597.45 1614.58 1691.85 1772.25
## 2013 1663.70 1579.61 1597.50 1476.71 1387.80 1234.53 1325.07 1395.27 1329.03
## 2014 1244.55 1326.39 1284.01 1291.60 1249.68 1327.33 1282.59 1287.32 1208.15
## 2015 1283.79 1213.18 1183.57 1184.37 1190.58 1172.35 1095.80 1134.93 1115.09
## 2016 1118.21 1238.67 1232.75 1293.53 1215.32 1321.90 1351.28 1308.97 1315.87
## 2017 1210.72 1248.44 1249.20 1268.28 1268.92 1241.61 1269.44 1321.43 1279.75
## 2018 1345.14 1318.31 1325.48 1315.39 1298.51 1252.60 1224.15 1201.15 1190.88
## 2019 1321.25 1313.32 1292.38 1283.53 1305.58 1409.55 1413.78 1520.38 1472.49
## 2020 1589.16 1585.69 1577.18 1686.50 1730.27 1780.96 1975.86 1967.80 1885.82
## 2021 1847.65 1734.04 1707.71 1769.13 1906.87 1770.11 1814.19 1813.62 1756.95
## 2022 1797.17 1908.99 1937.44 1980.11                                        
##          Oct     Nov     Dec
## 2003  384.25  398.15  415.45
## 2004  428.55  450.95  438.45
## 2005  465.19  493.08  517.00
## 2006  606.60  648.00  636.70
## 2007  796.80  783.55  833.70
## 2008  723.85  818.05  882.05
## 2009 1045.45 1179.63 1096.98
## 2010 1359.40 1386.23 1421.40
## 2011 1714.70 1746.35 1564.91
## 2012 1720.65 1714.98 1675.35
## 2013 1323.06 1253.35 1201.64
## 2014 1172.94 1167.38 1184.37
## 2015 1142.11 1064.77 1061.10
## 2016 1277.21 1173.20 1147.50
## 2017 1271.45 1275.01 1302.80
## 2018 1214.76 1220.52 1282.49
## 2019 1512.99 1463.98 1517.27
## 2020 1878.81 1776.95 1898.36
## 2021 1783.38 1774.52 1829.20
## 2022
percent <- ts(Gold[-nrow(Gold),3], frequency = 12, start = c(2003, 1), end = c(2022, 4))
percent
##                Jan           Feb           Mar           Apr           May
## 2003 -0.0494363710 -0.0357193885  0.0032597422  0.0765027322 -0.0495266840
## 2004 -0.0156541185  0.0764861795 -0.0930941494  0.0227537169 -0.0032865630
## 2005  0.0308802650 -0.0167565706  0.0141239640 -0.0394797422  0.0437387657
## 2006 -0.0129196695  0.0393553557  0.1212713099 -0.0141038767 -0.0454897706
## 2007  0.0247244336 -0.0082916262  0.0220699006 -0.0264573661 -0.0164269493
## 2008  0.0516142965 -0.0585275696 -0.0429163486  0.0101988491  0.0438804287
## 2009  0.0155951932 -0.0243760082 -0.0338826345  0.1024318847 -0.0538001185
## 2010  0.0336570477 -0.0038833562  0.0590882551  0.0316107309  0.0214420784
## 2011  0.0590164180  0.0147874700  0.0918167854 -0.0178870627 -0.0231486003
## 2012 -0.0235935918 -0.0168615479 -0.0020381860 -0.0626160084  0.0236717483
## 2013 -0.0505439683  0.0113255804 -0.0756118936 -0.0602081654 -0.1104409857
## 2014  0.0657587080 -0.0319513868  0.0059111689 -0.0324558687  0.0621359068
## 2015 -0.0550012074 -0.0244069305  0.0006759212  0.0052432939 -0.0153118648
## 2016  0.1077257402 -0.0047793198  0.0493044007 -0.0604624555  0.0876970674
## 2017  0.0311550152  0.0006087597  0.0152737752  0.0005046204 -0.0215222394
## 2018 -0.0199458792  0.0054387815 -0.0076123367 -0.0128326960 -0.0353559079
## 2019 -0.0060018921 -0.0159443243 -0.0068478311  0.0171791855  0.0796351047
## 2020 -0.0021835435 -0.0053667489  0.0693135850  0.0259531574  0.0292960058
## 2021 -0.0614889184 -0.0151841941  0.0359662940  0.0778574780 -0.0717196243
## 2022  0.0622200460  0.0149031687  0.0220239078 -0.0494363710              
##                Jun           Jul           Aug           Sep           Oct
## 2003  0.0229503464  0.0598278538  0.0260950606 -0.0028545478  0.0361743656
## 2004 -0.0081166772  0.0480756936  0.0204953032  0.0246264196  0.0522692801
## 2005 -0.0130884041  0.0125639832  0.0783547794 -0.0087577243  0.0599539973
## 2006  0.0339368353 -0.0148409894 -0.0462298741  0.0138726391  0.0682492582
## 2007  0.0225506042  0.0135480957  0.1044111095  0.0715438408 -0.0166290161
## 2008 -0.0122109358 -0.0907449951  0.0478854599 -0.1688960331  0.1301374594
## 2009  0.0296815974 -0.0029454927  0.0594097919  0.0374615461  0.1283466450
## 2010 -0.0494051739  0.0562235394  0.0490139490  0.0388677457  0.0197366485
## 2011  0.0845698516  0.1219999385 -0.1105201172  0.0559863036  0.0184580393
## 2012  0.0107233403  0.0478576472  0.0475219434 -0.0291155311 -0.0032952663
## 2013  0.0733396515  0.0529783332 -0.0474746823 -0.0044919979 -0.0526884646
## 2014 -0.0337067647  0.0036878504 -0.0614998602 -0.0291437322 -0.0047402254
## 2015 -0.0652961999  0.0357090710 -0.0174812544  0.0242312280 -0.0677167698
## 2016  0.0222255844 -0.0313110532  0.0052713202 -0.0293798020 -0.0814353160
## 2017  0.0224144458  0.0409550668 -0.0315415875 -0.0064856417  0.0027999528
## 2018 -0.0227127575 -0.0187885472 -0.0085501394  0.0200523982  0.0047416774
## 2019  0.0030009578  0.0754006988 -0.0314987043  0.0275044313 -0.0323928116
## 2020  0.1094353607 -0.0040792364 -0.0416607379 -0.0037172159 -0.0542151681
## 2021  0.0249024072 -0.0003141898 -0.0312468985  0.0150431145 -0.0049680943
## 2022                                                                      
##                Nov           Dec
## 2003  0.0434509607 -0.0312913708
## 2004 -0.0277192593 -0.0361500741
## 2005  0.0485113977  0.1003868472
## 2006 -0.0174382716  0.0259148736
## 2007  0.0640035735  0.1108312343
## 2008  0.0782348267  0.0519244941
## 2009 -0.0700643422 -0.0143849478
## 2010  0.0253709702 -0.0624173350
## 2011 -0.1038966988  0.1104536363
## 2012 -0.0231081412 -0.0069537709
## 2013 -0.0412574301  0.0357095303
## 2014  0.0145539584  0.0839433623
## 2015 -0.0034467538  0.0538215060
## 2016 -0.0219058984  0.0550936819
## 2017  0.0217959075  0.0324992324
## 2018  0.0507734408  0.0302224579
## 2019  0.0364007705  0.0473811517
## 2020  0.0683249388 -0.0267125308
## 2021  0.0308139666 -0.0175103871
## 2022
components.p <- decompose(percent)
plot(components.p)

# bc <- boxcox(price ~ 1)
# lam <- bc$x[which.max(bc$y)]
# lam
# ts_plot(BoxCox(price, lam))

Notice what happens when lambda equals 1. In that case, our data shifts down but the shape of the data does not change. Therefore, if the optimal value for lambda is 1, then the data is already normally distributed, and the Box-Cox transformation is unnecessary.

library(ggplot2)
library(forecast)
library(tseries)
fit <- auto.arima(price)
checkresiduals(fit)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,1) with drift
## Q* = 30.072, df = 22, p-value = 0.1167
## 
## Model df: 2.   Total lags used: 24
plot(forecast(fit, h = 60))

fitp <- auto.arima(percent)
predict(fitp, n.ahead = 12)
## $pred
##              Jan         Feb         Mar         Apr         May         Jun
## 2022                                                 0.015207273 0.007463333
## 2023 0.008291771 0.008291771 0.008291771 0.008291771                        
##              Jul         Aug         Sep         Oct         Nov         Dec
## 2022 0.008391013 0.008279882 0.008293195 0.008291600 0.008291791 0.008291768
## 2023                                                                        
## 
## $se
##             Jan        Feb        Mar        Apr        May        Jun
## 2022                                             0.04854813 0.04889524
## 2023 0.04890028 0.04890028 0.04890028 0.04890028                      
##             Jul        Aug        Sep        Oct        Nov        Dec
## 2022 0.04890021 0.04890028 0.04890028 0.04890028 0.04890028 0.04890028
## 2023
checkresiduals(fitp)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,0,0) with non-zero mean
## Q* = 28.908, df = 22, p-value = 0.1475
## 
## Model df: 2.   Total lags used: 24
adf.test(percent) ###station
## Warning in adf.test(percent): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  percent
## Dickey-Fuller = -5.3908, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
plot(forecast(fitp, h = 30),include = 20)

percent %>% stl(s.window='periodic') %>% seasadj() -> ggadj
plot(forecast(auto.arima(ggadj), h=30), include = 20)
abline(h=0, col = "dark red")

The ACF plot of the residuals from the ARIMA(0,1,1) model shows that all autocorrelations are within the threshold limits, indicating that the residuals are behaving like white noise. A portmanteau test returns a large p-value, also suggesting that the residuals are white noise. Residuals are almost normally distributed. The autocorrelation plot shows that for the first 12 lags, all sample autocorrelations except those at lags 6 and 11 fall inside the 95 % confidence bounds indicating the residuals appear to be random and we do not need any transformation.

acf(Gold$`Last Price`)

acf(Gold$Percent) ##p=1

pacf(Gold$Percent) ##q=0

###No need to diff
##Agree with auto.arima
lambdap <- BoxCox.lambda(percent)
## Warning in guerrero(x, lower, upper): Guerrero's method for selecting a Box-Cox
## parameter (lambda) is given for strictly positive data.
ndiffs(BoxCox(percent, lambdap)) ###d=0
## [1] 0
lambda <- BoxCox.lambda(price)
ndiffs(BoxCox(price,lambda)) ###d=1
## [1] 1
diff <- diff(Gold$`Last Price`)
ts.plot(diff)

acf(diff) ###p=5

pacf(diff) ###q=5